<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
  <head>

    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
    <meta content="Cask Data, Inc." name="author" />
<meta content="Copyright © 2016-2017 Cask Data, Inc." name="copyright" />


    <meta name="git_release" content="6.1.1">
    <meta name="git_hash" content="05fbac36f9f7aadeb44f5728cea35136dbc243e5">
    <meta name="git_timestamp" content="2020-02-09 08:22:47 +0800">
    <title>Introduction to CDAP Pipelines</title>

    <link rel="stylesheet" href="../_static/cdap-bootstrap.css" type="text/css" />
    <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
    <link rel="stylesheet" href="../_static/bootstrap-3.3.6/css/bootstrap.min.css" type="text/css" />
    <link rel="stylesheet" href="../_static/bootstrap-3.3.6/css/bootstrap-theme.min.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/bootstrap-sphinx.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/cdap-dynamicscrollspy-4.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/jquery.mCustomScrollbar.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/cdap-jquery.mCustomScrollbar.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/abixTreeList-2.css" type="text/css" />
    <link rel="stylesheet" href="../_static/cdap-bootstrap.css" type="text/css" />

    <script type="text/javascript">
      var DOCUMENTATION_OPTIONS = {
        URL_ROOT:    '',
        VERSION:     '6.1.1',
        COLLAPSE_INDEX: false,
        FILE_SUFFIX: '.html',
        HAS_SOURCE:  false
      };
    </script>
    <script type="text/javascript" src="../_static/jquery.js"></script>
    <script type="text/javascript" src="../_static/underscore.js"></script>
    <script type="text/javascript" src="../_static/doctools.js"></script>
    <script type="text/javascript" src="../_static/language_data.js"></script>

    <link rel="shortcut icon" href="../_static/favicon.ico"/>
    <link rel="index" title="Index" href="../genindex.html" />
    <link rel="search" title="Search" href="../search.html" />
    <link rel="top" title="Cask Data Application Platform 6.1.1 Documentation" href="../index.html" />
    <link rel="up" title="CDAP Pipelines" href="index.html" />
    <link rel="next" title="Getting Started" href="getting-started.html" />
    <link rel="prev" title="CDAP Pipelines" href="index.html" />
    <!-- block extrahead -->
    <meta charset='utf-8'>
    <meta http-equiv='X-UA-Compatible' content='IE=edge,chrome=1'>
    <meta name='viewport' content='width=device-width, initial-scale=1.0, maximum-scale=1'>
    <meta name="apple-mobile-web-app-capable" content="yes">
    <!-- block extrahead end -->

</head>
<body role="document">

<!-- block navbar -->
<div id="navbar" class="navbar navbar-inverse navbar-default navbar-fixed-top">
    <div class="container-fluid">
      <div class="row">
        <div class="navbar-header">
          <!-- .btn-navbar is used as the toggle for collapsed navbar content -->
          <a class="navbar-brand" href="../table-of-contents/../../index.html">
            <span><img alt="CDAP logo" src="../_static/cdap_logo.svg"/></span>
          </a>

          <button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".nav-collapse">
            <span class="icon-bar"></span>
            <span class="icon-bar"></span>
            <span class="icon-bar"></span>
          </button>

          <div class="pull-right">
            <div class="dropdown version-dropdown">
              <a href="#" class="dropdown-toggle" data-toggle="dropdown"
                role="button" aria-haspopup="true" aria-expanded="false">
                v 6.1.1 <span class="caret"></span>
              </a>
              <ul class="dropdown-menu">
                <li><a href="//docs.cdap.io/cdap/5.1.2/en/index.html">v 5.1.2</a></li>
                <li><a href="//docs.cdap.io/cdap/4.3.4/en/index.html">v 4.3.4</a></li>
              </ul>
            </div>
          </div>
          <form class="navbar-form navbar-right navbar-search" action="../search.html" method="get">
            <div class="form-group">
              <div class="navbar-search-image material-icons"></div>
              <input type="text" name="q" class="form-control" placeholder="  Search" />
            </div>
            <input type="hidden" name="check_keywords" value="yes" />
            <input type="hidden" name="area" value="default" />
          </form>

          <div class="collapse navbar-collapse nav-collapse navbar-right navbar-navigation">
            <ul class="nav navbar-nav"><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link " href="../table-of-contents/../../index.html">简介</a></li><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link current" href="../table-of-contents/../../guides.html">手册</a></li><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link " href="../table-of-contents/../../reference-manual/index.html">参考</a></li><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link " href="../table-of-contents/../../faqs/index.html">帮助</a></li>
            </ul>
          </div>

        </div>
      </div>
    </div>
  </div><!-- block navbar end -->
<!-- block main content -->
<div class="main-container container">
  <div class="row"><div class="col-md-2">
      <div id="sidebar" class="bs-sidenav scrollable-y-outside" role="complementary">
<!-- theme_manual: developer-manual -->
<!-- theme_manual_highlight: guides -->
<!-- sidebar_title_link: ../table-of-contents/../../guides.html -->

  <div role="note" aria-label="manuals links"><h3><a href="../table-of-contents/../../guides.html">Guides</a></h3>

    <ul class="this-page-menu">
      <li class="toctree-l1"><a href="../table-of-contents/../../user-guide/index.html" rel="nofollow">用户手册</a>
      </li>
      <li class="toctree-l1"><b><a href="../table-of-contents/../../developer-manual/index.html" rel="nofollow">开发手册</a></b>
      <nav class="pagenav">
      <ul class="current">
<li class="toctree-l1"><a class="reference internal" href="../index.html"> 简介</a></li>
<li class="toctree-l1"><a class="reference internal" href="../getting-started/index.html"> 入门指南</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../getting-started/sandbox/index.html">CDAP Sandbox</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../getting-started/sandbox/zip.html">二进制 Zip 文件</a></li>
<li class="toctree-l3"><a class="reference internal" href="../getting-started/sandbox/zip.html#cdap-sandbox">启动和停止 CDAP Sandbox</a></li>
<li class="toctree-l3"><a class="reference internal" href="../getting-started/sandbox/virtual-machine.html">虚拟机镜像</a></li>
<li class="toctree-l3"><a class="reference internal" href="../getting-started/sandbox/docker.html">Docker 镜像</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../getting-started/quick-start.html">快速入门</a></li>
<li class="toctree-l2"><a class="reference internal" href="../getting-started/dev-env.html">搭建开发环境</a></li>
<li class="toctree-l2"><a class="reference internal" href="../getting-started/start-stop-cdap.html">启动和停止 CDAP</a></li>
<li class="toctree-l2"><a class="reference internal" href="../getting-started/building-apps.html">构建并运行应用</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../overview/index.html"> 概述</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../overview/anatomy.html"> 大数据应用剖析</a></li>
<li class="toctree-l2"><a class="reference internal" href="../overview/modes.html"> 模式和组件</a></li>
<li class="toctree-l2"><a class="reference internal" href="../overview/abstractions.html"> 核心概念</a></li>
<li class="toctree-l2"><a class="reference internal" href="../overview/interfaces.html"> 编程接口</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../building-blocks/index.html"> 抽象概念</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../building-blocks/core.html"> Core Abstractions</a></li>
<li class="toctree-l2"><a class="reference internal" href="../building-blocks/applications.html"> Applications</a></li>
<li class="toctree-l2"><a class="reference internal" href="../building-blocks/datasets/index.html"> Datasets</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../building-blocks/datasets/overview.html"> Overview</a></li>
<li class="toctree-l3"><a class="reference internal" href="../building-blocks/datasets/table.html"> Table API</a></li>
<li class="toctree-l3"><a class="reference internal" href="../building-blocks/datasets/fileset.html"> FileSets</a></li>
<li class="toctree-l3"><a class="reference internal" href="../building-blocks/datasets/partitioned-fileset.html"> Partitioned FileSets</a></li>
<li class="toctree-l3"><a class="reference internal" href="../building-blocks/datasets/time-partitioned-fileset.html"> TimePartitioned FileSets</a></li>
<li class="toctree-l3"><a class="reference internal" href="../building-blocks/datasets/system-custom.html"> System and Custom Datasets</a></li>
<li class="toctree-l3"><a class="reference internal" href="../building-blocks/datasets/permissions.html"> Dataset Permissions</a></li>
<li class="toctree-l3"><a class="reference internal" href="../building-blocks/datasets/cube.html"> Cube Dataset</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../building-blocks/mapreduce-programs.html"> MapReduce Programs</a></li>
<li class="toctree-l2"><a class="reference internal" href="../building-blocks/plugins.html"> Plugins</a></li>
<li class="toctree-l2"><a class="reference internal" href="../building-blocks/schedules.html"> Schedules</a></li>
<li class="toctree-l2"><a class="reference internal" href="../building-blocks/secure-keys.html"> Secure Keys</a></li>
<li class="toctree-l2"><a class="reference internal" href="../building-blocks/services.html"> Services</a></li>
<li class="toctree-l2"><a class="reference internal" href="../building-blocks/spark-programs.html"> Spark Programs</a></li>
<li class="toctree-l2"><a class="reference internal" href="../building-blocks/workers.html"> Workers</a></li>
<li class="toctree-l2"><a class="reference internal" href="../building-blocks/workflows.html"> Workflows</a></li>
<li class="toctree-l2"><a class="reference internal" href="../building-blocks/artifacts.html"> Artifacts</a></li>
<li class="toctree-l2"><a class="reference internal" href="../building-blocks/program-lifecycle.html"> Program Lifecycle</a></li>
<li class="toctree-l2"><a class="reference internal" href="../building-blocks/namespaces.html"> Namespaces</a></li>
<li class="toctree-l2"><a class="reference internal" href="../building-blocks/transaction-system.html"> Transaction System</a></li>
<li class="toctree-l2"><a class="reference internal" href="../building-blocks/transactional-messaging-system.html"> Transactional Messaging System</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../metadata/index.html"> 元数据</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../metadata/system-metadata.html"> System Metadata</a></li>
<li class="toctree-l2"><a class="reference internal" href="../metadata/discovery-lineage.html"> Discovery and Lineage</a></li>
<li class="toctree-l2"><a class="reference internal" href="../metadata/field-lineage.html"> Field Level Lineage</a></li>
<li class="toctree-l2"><a class="reference internal" href="../metadata/audit-logging.html"> Audit Logging</a></li>
<li class="toctree-l2"><a class="reference internal" href="../metadata/metadata-ui.html"> CDAP Metadata UI</a></li>
<li class="toctree-l2"><a class="reference internal" href="../metadata/programmatic-metadata.html"> Accessing metadata programmatically</a></li>
</ul>
</li>
<li class="toctree-l1 current"><a class="reference internal" href="index.html"> 数据流管道</a><ul class="current">
<li class="toctree-l2 current"><a class="current reference internal" href="#"> Concepts and Design</a></li>
<li class="toctree-l2"><a class="reference internal" href="getting-started.html"> Getting Started</a></li>
<li class="toctree-l2"><a class="reference internal" href="studio.html"> CDAP Studio</a></li>
<li class="toctree-l2"><a class="reference internal" href="creating-pipelines.html"> Creating Pipelines</a></li>
<li class="toctree-l2"><a class="reference internal" href="running-pipelines.html"> Running Pipelines</a></li>
<li class="toctree-l2"><a class="reference internal" href="plugin-management.html"> Plugin Management</a></li>
<li class="toctree-l2"><a class="reference internal" href="plugins/index.html"> Plugin Reference</a><ul>
<li class="toctree-l3"><a class="reference internal" href="plugins/actions/index.html"> Action Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="plugins/sources/index.html"> Source Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="plugins/transforms/index.html"> Transform Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="plugins/analytics/index.html"> Analytic Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="plugins/sinks/index.html"> Sink Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="plugins/shared-plugins/index.html"> Shared Plugins</a><ul>
<li class="toctree-l4"><a class="reference internal" href="plugins/shared-plugins/core.html">CoreValidator</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="plugins/post-run-plugins/index.html"> Post-run Plugins</a><ul class="simple">
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="developing-pipelines.html"> Developing Pipelines</a></li>
<li class="toctree-l2"><a class="reference internal" href="developing-plugins/index.html"> Developing Plugins</a><ul>
<li class="toctree-l3"><a class="reference internal" href="developing-plugins/plugin-basics.html">Plugin Basics</a></li>
<li class="toctree-l3"><a class="reference internal" href="developing-plugins/creating-a-plugin.html">Creating a Plugin</a></li>
<li class="toctree-l3"><a class="reference internal" href="developing-plugins/presentation-plugins.html">Plugin Presentation</a></li>
<li class="toctree-l3"><a class="reference internal" href="developing-plugins/testing-plugins.html">Testing Plugins</a></li>
<li class="toctree-l3"><a class="reference internal" href="developing-plugins/packaging-plugins.html">Packaging Plugins</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="how-cdap-pipelines-work.html"> How CDAP Pipelines Work</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../cloud-runtimes/index.html"> 云平台运行</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../cloud-runtimes/concepts/index.html"> Concepts</a></li>
<li class="toctree-l2"><a class="reference internal" href="../cloud-runtimes/provisioners/index.html"> Provisioners</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../cloud-runtimes/provisioners/gcp-dataproc.html">Google Dataproc</a></li>
<li class="toctree-l3"><a class="reference internal" href="../cloud-runtimes/provisioners/aws-emr.html">Amazon Elastic MapReduce</a></li>
<li class="toctree-l3"><a class="reference internal" href="../cloud-runtimes/provisioners/remote-hadoop.html">Remote Hadoop</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../cloud-runtimes/profiles/index.html"> Profiles</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../cloud-runtimes/profiles/creating-profiles.html">Creating Profiles</a></li>
<li class="toctree-l3"><a class="reference internal" href="../cloud-runtimes/profiles/assigning-profiles.html">Assigning Profiles</a></li>
<li class="toctree-l3"><a class="reference internal" href="../cloud-runtimes/profiles/admin-controls.html">Admin Controls</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../cloud-runtimes/example/index.html"> Example</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../security/index.html"> 安全</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../security/client-authentication.html">Client Authentication</a></li>
<li class="toctree-l2"><a class="reference internal" href="../security/cdap-authentication-clients-java.html">CDAP Authentication Client for Java</a></li>
<li class="toctree-l2"><a class="reference internal" href="../security/cdap-authentication-clients-python.html">CDAP Authentication Client for Python</a></li>
<li class="toctree-l2"><a class="reference internal" href="../security/custom-authentication.html">Custom Authentication</a></li>
<li class="toctree-l2"><a class="reference internal" href="../security/authorization-extensions.html">Authorization Extensions</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../testing/index.html"> 测试和调试</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../testing/testing.html"> Testing a CDAP Application</a></li>
<li class="toctree-l2"><a class="reference internal" href="../testing/debugging.html"> Debugging</a></li>
<li class="toctree-l2"><a class="reference internal" href="../testing/troubleshooting.html"> Troubleshooting</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../ingesting-tools/index.html"> 数据融合</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../ingesting-tools/cdap-stream-clients-java.html">CDAP Stream Client for Java</a></li>
<li class="toctree-l2"><a class="reference internal" href="../ingesting-tools/cdap-stream-clients-python.html">CDAP Stream Client for Python</a></li>
<li class="toctree-l2"><a class="reference internal" href="../ingesting-tools/cdap-stream-clients-ruby.html">CDAP Stream Client for Ruby</a></li>
<li class="toctree-l2"><a class="reference internal" href="../ingesting-tools/cdap-flume.html">CDAP Flume</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../data-exploration/index.html"> 数据探索</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../data-exploration/filesets.html"> Fileset Exploration</a></li>
<li class="toctree-l2"><a class="reference internal" href="../data-exploration/tables.html"> Table Exploration</a></li>
<li class="toctree-l2"><a class="reference internal" href="../data-exploration/object-mapped-tables.html"> ObjectMappedTable Exploration</a></li>
<li class="toctree-l2"><a class="reference internal" href="../data-exploration/custom-datasets.html"> Custom Dataset Exploration</a></li>
<li class="toctree-l2"><a class="reference internal" href="../data-exploration/hive-execution-engines.html"> Hive Execution Engines</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../advanced/index.html"> 高级主题</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../advanced/application-logback.html"> Application Logback</a></li>
<li class="toctree-l2"><a class="reference internal" href="../advanced/best-practices.html"> Best Practices</a></li>
<li class="toctree-l2"><a class="reference internal" href="../advanced/class-loading.html"> Class Loading</a></li>
<li class="toctree-l2"><a class="reference internal" href="../advanced/configuring-resources.html"> Configuring Program Resources</a></li>
<li class="toctree-l2"><a class="reference internal" href="../advanced/program-retry-policies.html"> Program Retry Policies</a></li>
</ul>
</li>
</ul>
</nav>
      </li>
      <li class="toctree-l1"><a href="../table-of-contents/../../admin-manual/index.html" rel="nofollow">管理手册</a>
      </li>
      <li class="toctree-l1"><a href="../table-of-contents/../../integrations/index.html" rel="nofollow">集成手册</a>
      </li>
      <li class="toctree-l1"><a href="../table-of-contents/../../examples-manual/index.html" rel="nofollow">最佳实践</a>
      </li>
    </ul>
  </div></div>
    </div><div class="col-md-8 content" id="main-content">
    
  <div class="section" id="introduction-to-cdap-pipelines">
<span id="cdap-pipelines-concepts-design"></span><h1>Introduction to CDAP Pipelines<a class="headerlink" href="#introduction-to-cdap-pipelines" title="Permalink to this headline">🔗</a></h1>
<p>CDAP Pipelines is a self-service, reconfigurable, extendable framework to develop, run,
automate, and operate <strong>data pipelines</strong> on Hadoop. Completely open source, it is licensed
under the Apache 2.0 license.</p>
<p>Pipelines are a capability of CDAP and includes the <em>CDAP Studio</em>, a visual
click-and-drag interface for building data pipelines from an included library of pre-built
plugins.</p>
<p>CDAP provides an operational view of the resulting pipeline that allows for lifecycle
control and monitoring of the metrics, logs, and other runtime information. The pipeline
can be run directly in CDAP with tools such as the CDAP UI, the CDAP CLI, or command line
tools.</p>
<div class="section" id="pipelines">
<h2>Pipelines<a class="headerlink" href="#pipelines" title="Permalink to this headline">🔗</a></h2>
<p>Pipelines are applications—specifically for the processing of data flows—created from artifacts.</p>
<p>An <strong>artifact</strong> is an “application template”. A pipeline application is created by CDAP by
using a <strong>configuration file</strong> that defines the desired application, along with whichever artifacts are
specified inside the configuration. Artifacts for creating data pipelines are supplied
with CDAP.</p>
<div class="section" id="stages-and-plugins">
<h3>Stages and Plugins<a class="headerlink" href="#stages-and-plugins" title="Permalink to this headline">🔗</a></h3>
<p>A pipeline can be viewed as consisting of a series of <em>stages</em>. Each stage is a usage
of a <em>plugin</em>, an extension to CDAP that provides a specific functionality.</p>
<p>A stage’s configuration properties describes what that plugin is to do (read from a
stream, write to a table, run a script), and is dependent on the particular plugin used.</p>
<p>All stages are connected together in a directed acyclic graph (or <em>DAG</em>), which is
shown in the <em>CDAP Studio</em> and in CDAP as a connected series of icons:</p>
<a class="reference internal image-reference" href="../_images/fork-in-pipeline.png"><img alt="../_images/fork-in-pipeline.png" class="align-center" src="../_images/fork-in-pipeline.png" style="width: 6in;" /></a>
<p>The general progression in a pipeline is:</p>
<ol class="arabic simple">
<li><strong>Pre-run operations:</strong> any actions required before the pipeline can actually run, such
as preparing resources</li>
<li><strong>Data acquisition:</strong> obtaining data from a source or sources</li>
<li><strong>Data transformation:</strong> manipulating the data acquired from the sources</li>
<li><strong>Data publishing:</strong> saving the results of the transformation, either as additional data to a
data <em>sink</em> or to a report</li>
<li><strong>Post-run operations:</strong> any actions required once the pipeline run has completed, such
as emailing notifications or cleaning up resources, regardless if the pipeline run
succeeded or failed</li>
</ol>
<p>Different plugins are available to provide functionality for each stage.</p>
</div>
<div class="section" id="data-and-control-flow">
<h3>Data and Control Flow<a class="headerlink" href="#data-and-control-flow" title="Permalink to this headline">🔗</a></h3>
<p>Processing in the pipeline is governed by two aspects: <em>data</em> and <em>control</em> flow.</p>
<p><strong>Data flow</strong> is the movement of data, in the form of records, from one step of a pipeline
to another. When data arrives at a stage, it triggers that stage’s processing of the data
and then the transference of results (if any) to the next stage.</p>
<p><strong>Control flow</strong> is a parallel process that triggers a stage based on the result from
another process, independent of the pipeline. Currently, control flow can be applied
<em>only</em> to the <em>initial</em> stages (before any data flow stages run) and <em>final</em> stages (after
all other data flow stages run) of a pipeline. A <em>post-run</em> stage is available after each
pipeline run, successful or otherwise.</p>
</div>
<div class="section" id="logical-and-physical-pipelines">
<h3>Logical and Physical Pipelines<a class="headerlink" href="#logical-and-physical-pipelines" title="Permalink to this headline">🔗</a></h3>
<p>Within CDAP, there is the concept of <em>logical</em> and <em>physical</em> pipelines.</p>
<p>A <strong>logical pipeline</strong> is the view as seen in the CDAP Studio.
It shows the stages, but not the underlying technology used to actually manifest and run the pipeline.</p>
<p>A <strong>physical pipeline</strong> is the manifestation of a logical pipeline as a CDAP application,
which is a collection of programs and services that read and write through the data
abstraction layer in CDAP.</p>
<p>A <em>planner</em> is responsible for converting the logical pipeline to the physical pipeline. The
planner analyzes the logical view of the pipeline and converts it to the CDAP application.</p>
</div>
<div class="section" id="types-of-pipelines">
<h3>Types of Pipelines<a class="headerlink" href="#types-of-pipelines" title="Permalink to this headline">🔗</a></h3>
<p>The data flows of a pipeline can be either <strong>batch</strong> or <strong>real-time,</strong> and a variety of
processing paradigms (MapReduce or Spark) can be used.</p>
<p><strong>Batch applications</strong> can be scheduled to run periodically using a cron expression and can
read data from batch sources using a MapReduce job. The batch application then performs
any of a number of optional transformations before writing to one or more batch sinks.</p>
<p><strong>Real-time applications</strong> are designed to poll sources periodically to fetch the data,
perform any optional transformations required, and then write to one or more real-time
sinks.</p>
<p>The pipelines are created from artifacts, either system artifacts (supplied as part of
CDAP) or user artifacts, <a class="reference internal" href="../building-blocks/artifacts.html#artifacts"><span class="std std-ref">created by a developer</span></a>.</p>
</div>
<div class="section" id="pipeline-lifecycle">
<h3>Pipeline Lifecycle<a class="headerlink" href="#pipeline-lifecycle" title="Permalink to this headline">🔗</a></h3>
<p>Similar to other CDAP applications, pipelines have a lifecycle, and can be managed and
controlled using the tools supplied by CDAP, such as the
<a class="reference internal" href="running-pipelines.html#cdap-pipelines-running-pipelines-within-cdap-pipelines-ui"><span class="std std-ref">CDAP Pipelines UI</span></a>,
the <span class="xref std std-ref">CDAP UI</span>, the <span class="xref std std-ref">CDAP CLI</span>, and command line tools,
using the <span class="xref std std-ref">Lifecycle HTTP RESTful API</span>.</p>
</div>
</div>
<div class="section" id="plugins">
<span id="cdap-pipelines-introduction-what-is-a-plugin"></span><h2>Plugins<a class="headerlink" href="#plugins" title="Permalink to this headline">🔗</a></h2>
<p>Data <em>sources</em>, transformations (called <em>transforms</em> for short), and data <em>sinks</em> are
generically referred to as a <em>plugin</em>. Plugins provide a way to extend the functionality
of existing artifacts. An application can be created with the existing plugins included
with CDAP or, if a user wishes, they can write a plugin to add their own capability.</p>
<p>See the <a class="reference internal" href="plugins/index.html#cdap-pipelines-plugins"><span class="std std-ref">reference section</span></a> for details on the capabilities
and behavior of each plugin.</p>
</div>
<div class="section" id="properties">
<h2>Properties<a class="headerlink" href="#properties" title="Permalink to this headline">🔗</a></h2>
<p>Each stage in a pipeline represents the configuration of a specific plugin, and that
configuration usually requires that certain properties be specified. At a minimum, a
unique name for the stage and the plugin being used is required, with any additional
properties required dependent on the particular plugin used.</p>
<p>See the <a class="reference internal" href="plugins/index.html#cdap-pipelines-plugins"><span class="std std-ref">reference section</span></a> for details on the properties
required and supported for each plugin.</p>
</div>
<div class="section" id="schema">
<h2>Schema<a class="headerlink" href="#schema" title="Permalink to this headline">🔗</a></h2>
<p>Each stage of a pipeline that emits data (basically, all stages except for <em>pre-run
operations</em> and <em>data publishing</em>) emits data with a schema that is set for that stage.
Schemas need to match appropriately from stage to stage, and controls within the <em>CDAP Application
Studio</em> allow the propagation of a schema to subsequent stages.</p>
<p>The schema allows you to control which fields and their types are used in all stages of
pipeline. Certain plugins require specific schemas, and transform plugins are available to
convert data to required formats and schemas.</p>
</div>
</div>

</div>
    <div class="col-md-2">
      <div id="right-sidebar" class="bs-sidenav scrollable-y" role="complementary">
        <div id="localtoc-scrollspy">
        </div>
      </div>
    </div></div>
</div>
<!-- block main content end -->
<!-- block footer -->
<footer class="footer">
      <div class="container">
        <div class="row">
          <div class="col-md-2 footer-left"><a title="CDAP Pipelines" href="index.html" />Previous</a></div>
          <div class="col-md-8 footer-center"><a class="footer-tab-link" href="../table-of-contents/../../reference-manual/licenses/index.html">Copyright</a> &copy; 2014-2020 Cask Data, Inc.&bull; <a class="footer-tab-link" href="//docs.cask.co/cdap/6.1.1/cdap-docs-6.1.1-web.zip" rel="nofollow">Download</a> an archive or
<a class="footer-tab-link" href="//docs.cask.co/cdap">switch the version</a> of the documentation
          </div>
          <div class="col-md-2 footer-right"><a title="Getting Started" href="getting-started.html" />Next</a></div>
        </div>
      </div>
    </footer>
<!-- block footer end -->
<script type="text/javascript" src="../_static/bootstrap-3.3.6/js/bootstrap.min.js"></script><script type="text/javascript" src="../_static/js/bootstrap-sphinx.js"></script><script type="text/javascript" src="../_static/js/abixTreeList-2.js"></script><script type="text/javascript" src="../_static/js/cdap-dynamicscrollspy-4.js"></script><script type="text/javascript" src="../_static/js/cdap-version-menu.js"></script><script type="text/javascript" src="../_static/js/copy-to-clipboard.js"></script><script type="text/javascript" src="../_static/js/jquery.mousewheel.min.js"></script><script type="text/javascript" src="../_static/js/jquery.mCustomScrollbar.js"></script><script type="text/javascript" src="../_static/js/js.cookie.js"></script><script type="text/javascript" src="../_static/js/tabbed-parsed-literal-0.2.js"></script><script type="text/javascript" src="../_static/js/cdap-onload-javascript.js"></script><script type="text/javascript" src="../_static/js/cdap-version-menu.js"></script>
    <script src="https://cdap.gitee.io/docs/cdap/json-versions.js"/></script>
  </body>
</html>